# build in python3.5.2
# learn from https://machinelearningmastery.com/implement-simple-linear-regression-scratch-python/
# 不用其他库，单纯使用python实现的线性回归
import math
import matplotlib.pyplot as plt
data={'X':[108,19,13,124,40,57,23,14,45,10,5,48,11,23,7,2,24,6,3,23,6,9,9,3,29,7,4,20,7,4,0,25,6,5,22,11,61,12,4,16,13,60,41,37,55,41,11,27,8,3,17,13,13,15,8,29,30,24,9,31,14,53,26]
      ,'Y':[392.5,46.2,15.7,422.2,119.4,170.9,56.9,77.5,214,65.3,20.9,248.1,23.5,39.6,48.8,6.6,134.9,50.9,4.4,113,14.8,48.7,52.1,13.2,103.9,77.5,11.8,98.1,27.9,38.1,0,69.2,14.6,40.3,161.5,57.2,217.6,58.1,12.6,59.6,89.9,202.4,181.3,152.8,162.8,73.4,21.3,92.6,76.1,39.9,142.1,93,31.9,32.1,55.6,133.3,194.5,137.9,87.4,209.8,95.5,244.6,187.5]}

# 求均值和方差
mean=lambda value:sum(value)/int(len(value))
variance=lambda value,mean:sum([(x-mean)**2 for x in value])

# 协方差
def covariance(x,mean_x,y,mean_y):
    covar=0.0
    for i in range(len(x)):
        covar+=(x[i]-mean_x)*(y[i]-mean_y)
        return covar

# 计算系数
def coeffients(dataset):
    x=[row[0] for row in dataset]
    y=[col[1] for col in dataset]
    x_mean,y_mean=mean(x),mean(y)
    b1=covariance(x,x_mean,y,y_mean)/variance(x,x_mean)    # 这个是算权值
    b0=y_mean-b1*x_mean                                    # 计算偏差
    return [b0,b1]

# 预测
def prediction(train,test):
    predictions=[]
    b0,b1=coeffients(train)
    for row in test:
        yhat=b0+b1*row[0]
        predictions.append(yhat)
    return predictions

# 计算平方根误差
def root_mean_square_error(actual,predicted):
    sum_error=0.0
    for i in range(len(actual)):
        prediction_error=predicted[i]-actual[i]
        sum_error+=(prediction_error**2)
    mean_error=sum_error/float(len(actual))
    return math.sqrt(mean_error)

# 评估训练集上的回归算法
def evaluate_algorithm(dataset,algorithm):
    test_set=[]
    for row in dataset:
        row_copy=list(row)
        row_copy[-1]=None
        test_set.append(row_copy)
    predicted=algorithm(dataset,test_set)
    print(predicted)
    actual=[row[-1] for row in dataset]
    error=root_mean_square_error(actual,predicted)
    return error

result=evaluate_algorithm([data['X'],data['Y']],prediction)
print(result)
plt.scatter(data['X'],data['Y'],color='orange')
plt.show()